# Set Working Directory before starting
# Immcantation
suppressPackageStartupMessages(library(alakazam))
suppressPackageStartupMessages(library(shazam))
suppressPackageStartupMessages(library(tigger))
suppressPackageStartupMessages(library(dowser))
suppressPackageStartupMessages(library(glmGamPoi))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dittoSeq))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(impactSingleCellToolkit))
source("resources/functions_workshop.R")
load("objects/processed_objects.RData")
# T cell CSF object
tcell_obj_csf <- tcell_obj[,names(tcell_obj$status[tcell_obj$status %in% "Disease"])]
tcell_obj_csf <- tcell_obj_csf[,names(tcell_obj_csf$compartment[tcell_obj_csf$compartment %in% "CSF"])]
tcell_obj_csf$expansion_category[tcell_obj_csf$expansion_category %in% c("High","Moderate")] <- "Expanded"
tcell_obj_csf$expansion.subtype <- paste(tcell_obj_csf$expansion_category,tcell_obj_csf$sub.celltype.annot)
# B cell object
bcell_obj_csfpb <- bcell_obj
bcell_obj_csfpb$expansion_category[bcell_obj_csfpb$expansion_category %in% c("High","Moderate")] <- "Expanded"
bcell_obj_csfpb$expansion.subtype <- paste(bcell_obj_csfpb$expansion_category,bcell_obj_csfpb$sub.celltype.annot)
genes_bcells <- row.names(bcell_obj_csfpb)
vgenes_bcells <- sort(genes_bcells[grep("IGHV",genes_bcells)])
h1 <- dittoHeatmap(bcell_obj_csfpb, genes = vgenes_bcells,
annot.by = c("sub.celltype.annot","compartment","status"),
scaled.to.max = TRUE,
show_colnames = FALSE,
show_rownames = TRUE,
cluster_cols = TRUE,
main = "V gene usage B cells")
genes_bcells <- row.names(bcell_obj_csfpb)
other_ig_genes_bcells <- sort(genes_bcells[grep("IGHA|IGHG|IGHM|IGHD",genes_bcells)])
h2 <- dittoHeatmap(bcell_obj_csfpb, genes = other_ig_genes_bcells,
annot.by = c("sub.celltype.annot","compartment","status"),
scaled.to.max = TRUE,
show_colnames = FALSE,
show_rownames = TRUE,
cluster_cols = FALSE,
main = "Ig gene usage B cells")
load("objects/csf_vs_pb_DGE.RData")
TOP_N <- 100
# Tcells
Idents(tcell_obj) <- tcell_obj$compartment
levels(tcell_obj) <- c("CSF","PB")
dge_compartment_tcells_all <- FindAllMarkers(tcell_obj,logfc.threshold = 0.0,test.use = "wilcox")
dge_compartment_tcells <- dge_compartment_tcells_all[dge_compartment_tcells_all$p_val_adj < 0.05,]
top_genes_tcells <- dge_compartment_tcells %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)
# B cells
Idents(bcell_obj) <- bcell_obj$compartment
levels(bcell_obj) <- c("CSF","PB")
dge_compartment_bcells_all <- FindAllMarkers(bcell_obj,logfc.threshold = 0.0,test.use = "wilcox")
dge_compartment_bcells <- dge_compartment_bcells_all[dge_compartment_bcells_all$p_val_adj < 0.05,]
top_genes_bcells <- dge_compartment_bcells %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)
p1 <- dittoHeatmap(tcell_obj, genes = top_genes_tcells$gene,
annot.by = c("compartment","status", "sub.celltype.annot"),
scaled.to.max = TRUE,
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
main = "CSF vs. PB T cells")
# CSF vs PB
p1.2 <- DoHeatmap(tcell_obj,features = top_genes_tcells$gene)
## Warning in DoHeatmap(tcell_obj, features = top_genes_tcells$gene): The
## following features were omitted as they were not found in the scale.data slot
## for the SCT assay: KRT72, SH3BGRL2, AP000547.3, PADI4, CLEC11A, ITGA2B, KRT2,
## CASC1, MEST, AL034550.2, DBN1, SYNGR1, TREML1, LINC02295, PROK2, OSBPL5, REG4,
## CES1, AL109767.1, KRT73-AS1, SLC22A17, MZF1-AS1, PCSK5, SNHG19, AC004832.6,
## MMP28, ZBTB10, CD248, AF213884.3, C6orf99, EFCAB2, AL121944.1, AK5, LRRN3, NOG,
## COL18A1, KIF9, MXD4, CIC, FAM129B, DOPEY2, PERP, ALCAM, SLC25A1, MKNK2,
## ARHGAP35, ALDOC, RNPEPL1, DHCR7, TMEM11, CALHM2, SNAI3, TLNRD1, CXCR6, SS18L2,
## ERG28, SKIL, TNFSF14, CCR5, PDCD1, LTC4S, HMGCR, BLOC1S1, FASN, DHCR24, DYNLT1,
## SCD
p1
p1.2
p2 <- dittoHeatmap(bcell_obj, genes = top_genes_bcells$gene,
annot.by = c("compartment","status", "sub.celltype.annot"),
scaled.to.max = TRUE,
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
main = "CSF vs. PB B cells")
# CSF vs PB
p2.2 <- DoHeatmap(bcell_obj,features = top_genes_bcells$gene)
## Warning in DoHeatmap(bcell_obj, features = top_genes_bcells$gene): The
## following features were omitted as they were not found in the scale.data slot
## for the SCT assay: NPM1, GPSM3, BTF3, RPL31, PFDN5, RPL4, CCNI, CIRBP, PCBP2,
## NACA, PPDPF, NOP53, RPL22, EEF1D, RACK1, RPL10A, FAU, RPL23A, RPL35A, RPS9,
## EEF2, RPL18, RPL5, RPL9, RPS5, RPLP2, RPL8, RPSA, S100A13, RGS16, ARID3A,
## EEF1B2, RPS25, RPL36, UBA52, RPL6, RPS4X, RPL24, RPL3, RPL14, RPS24, OSTN-AS1,
## RPS16, ACY3, RPL37A, RPL21, RPS15, RPS6, RPS7, GCAT, EPB41L3, TYMS, AC015849.1,
## ATP8B4, ZMYND19, GPR34, KHDC1, PTPN13, CAMK1, RAPH1, DAPL1, MIXL1, WLS, RPL15,
## RPL29, MAFF, TGM2, ENDOG, KCNK12, RPL12, RPS21, RPL7A, IGHG4, AXL, FCRL4
p2
p2.2
v1 <- plotDGEvolcano(dge_compartment_bcells_all,YLIM = c(0,50),title = "B cell DGE CSF vs. PB",cluster_filt = "CSF",
p.adj.thresh = 0.05,show_top_genes = 20)
v2 <- plotDGEvolcano(dge_compartment_tcells_all,YLIM = c(0,250),title = "T cell DGE CSF vs. PB",cluster_filt = "CSF",
p.adj.thresh = 0.05,show_top_genes = 20)
plot_grid(v1,v2,nrow = 1)
TOP_N <- 12
Idents(tcell_obj_csf) <- tcell_obj_csf$expansion.subtype
levels(tcell_obj_csf) <- c("Expanded CD8 T cells","Non-expanded CD8 T cells","Expanded CD4 T cells","Non-expanded CD4 T cells")
dge_tcell_csf <- FindAllMarkers(tcell_obj_csf,logfc.threshold = 0.1,test.use = "wilcox")
## Calculating cluster Expanded CD8 T cells
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster Non-expanded CD8 T cells
## Calculating cluster Expanded CD4 T cells
## Calculating cluster Non-expanded CD4 T cells
dge_tcell_csf <- dge_tcell_csf[dge_tcell_csf$p_val_adj < 0.05,]
top_genes_tcells_csf <- dge_tcell_csf %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)
DotPlot(object = tcell_obj_csf,features = unique(top_genes_tcells_csf$gene)) + theme(axis.text.x = element_text(angle = 90, hjust=1)) + ggtitle("T cells CSF High vs. Non-Expanded") + coord_flip()
TOP_N <- 10
Idents(bcell_obj_csfpb) <- bcell_obj_csfpb$expansion.subtype
complete_order <- c("Expanded Plasma cells","Non-expanded Plasma cells",
"Expanded Memory B-cells","Non-expanded Memory B-cells",
"Expanded naive B-cells","Non-expanded naive B-cells")
complete_order_sub <- complete_order[complete_order %in% unique(bcell_obj_csfpb$expansion.subtype)]
levels(bcell_obj_csfpb) <- complete_order_sub
dge_bcell_csfpb <- FindAllMarkers(bcell_obj_csfpb,logfc.threshold = 0.1,test.use = "wilcox")
## Calculating cluster Expanded Memory B-cells
## Calculating cluster Non-expanded Memory B-cells
## Calculating cluster Expanded naive B-cells
## Calculating cluster Non-expanded naive B-cells
dge_bcell_csfpb <- dge_bcell_csfpb[dge_bcell_csfpb$p_val_adj < 0.05,]
top_genes_bcells_csfpb <- dge_bcell_csfpb %>% group_by(cluster) %>% top_n(TOP_N, avg_log2FC)
DotPlot(object = bcell_obj_csfpb,features = unique(top_genes_bcells_csfpb$gene)) + theme(axis.text.x = element_text(angle = 90, hjust=1)) + ggtitle("B cells CSF High vs. Non-Expanded") + coord_flip()
suppressPackageStartupMessages(library(ReactomePA))
suppressPackageStartupMessages(library(org.Hs.eg.db))
suppressPackageStartupMessages(library(igraph))
suppressPackageStartupMessages(library(enrichplot))
suppressPackageStartupMessages(library(ggnewscale))
suppressPackageStartupMessages(library(DOSE))
suppressPackageStartupMessages(library(clusterProfiler))
suppressPackageStartupMessages(library(ggupset))
# Most of the expression should be in the target cluster
# - pct.1 = proportion of expression of the gene within the target cluster group
# - pct.2 = proportion of expression of the gene within everything outside the target cluster
target_genes <- dge_bcell_csfpb[dge_bcell_csfpb$cluster %in% c("Expanded Memory B-cells","Non-expanded Memory B-cells"),]
# Convert gene names to Entrez IDs
de_genes <- mapIds(org.Hs.eg.db, target_genes$gene, 'ENTREZID', 'SYMBOL')
## 'select()' returned 1:1 mapping between keys and columns
de_genes <- de_genes[!is.na(de_genes)]
de <- as.character(de_genes)
# Create FC and P-adj-value vectors for enrihed genes
de_fc_df <- target_genes[target_genes$gene %in% names(de_genes),]
entrez_col <- c()
for (g in names(de_genes)) {
egene <- as.character(de_genes[g])
entrez_col <- c(entrez_col,egene)
}
de_fc_df$entrez_id <- entrez_col
de_fc_list <- de_fc_df$avg_log2FC
names(de_fc_list) <- de_fc_df$entrez_id
# P-value vec
de_pval_list <- de_fc_df$p_val_adj
names(de_pval_list) <- de_fc_df$entrez_id
# Pathway Enrichment
bcell_pathways <- enrichPathway(gene=de,pvalueCutoff=0.05, readable=T)
bcell_pathways_df <- as.data.frame(bcell_pathways)
# Top enriched pathways and the gene count within each pathway
barplot(bcell_pathways, showCategory = 14)
edo <- enrichDGN(as.list(de))
de_fc_list <- sort(de_fc_list,decreasing = T)
## convert gene ID to Symbol
edox <- setReadable(edo, 'org.Hs.eg.db', 'ENTREZID')
## Network categorySize can be scaled by 'pvalue' or 'geneNum'
p1_bcell <- cnetplot(edox, categorySize="pvalue", foldChange=de_fc_list)
## Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
## The foldChange parameter will be removed in the next version.
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
## Heatmap
p2_bcell <- heatplot(edox, foldChange=de_fc_list, showCategory=14)
plot_grid(p1_bcell,p2_bcell,ncol = 2)
# - pct.1 = proportion of expression of the gene within the target cluster group
# - pct.2 = proportion of expression of the gene within everything outside the target cluster
target_genes <- dge_tcell_csf[dge_tcell_csf$cluster %in% c("Expanded CD8 T cells","Non-expanded CD8 T cells"),]
# Convert gene names to Entrez IDs
de_genes <- mapIds(org.Hs.eg.db, target_genes$gene, 'ENTREZID', 'SYMBOL')
## 'select()' returned 1:1 mapping between keys and columns
de_genes <- de_genes[!is.na(de_genes)]
de <- as.character(de_genes)
# Create FC and P-adj-value vectors for enrihed genes
de_fc_df <- target_genes[target_genes$gene %in% names(de_genes),]
entrez_col <- c()
for (g in names(de_genes)) {
egene <- as.character(de_genes[g])
entrez_col <- c(entrez_col,egene)
}
de_fc_df$entrez_id <- entrez_col
de_fc_list <- de_fc_df$avg_log2FC
names(de_fc_list) <- de_fc_df$entrez_id
# P-value vec
de_pval_list <- de_fc_df$p_val_adj
names(de_pval_list) <- de_fc_df$entrez_id
tcell_pathways <- enrichPathway(gene=de,pvalueCutoff=0.05, readable=T)
tcell_pathways_df <- as.data.frame(tcell_pathways)
barplot(tcell_pathways, showCategory=10)
edo <- enrichDGN(as.list(de))
de_fc_list <- sort(de_fc_list,decreasing = T)
## convert gene ID to Symbol
edox <- setReadable(edo, 'org.Hs.eg.db', 'ENTREZID')
## Network categorySize can be scaled by 'pvalue' or 'geneNum'
p1_tcell <- cnetplot(edox, categorySize="pvalue", foldChange=de_fc_list)
## Warning in cnetplot.enrichResult(x, ...): Use 'color.params = list(foldChange = your_value)' instead of 'foldChange'.
## The foldChange parameter will be removed in the next version.
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
## Heatmap
p2_tcell <- heatplot(edox, foldChange=de_fc_list, showCategory=25)
plot_grid(p1_tcell,p2_tcell,ncol = 2)